/********************************************************************************
Health system amenable mortality main analysis file
Analysis file
Anna Gage, agage@hsph.harvard.edu
June 20, 2018

This file is the main analysis file for the paper "Mortality due to low quality health systems in the Universal Health Coverage era: 
a systematic analysis of amenable deaths in 137 countries". In this file, we read in cleaned data from the GBD project (http://ghdx.healthdata.org/gbd-results-tool)
on the burden of disease for 61 conditions. Data is disaggregated by age group, sex, country, and condition. Data is previously cleaned to include the correct age-sex
groups, merged with country characteristic data, and each condition is assigned a utilization measure. 

In this do file, we first calculate the case fatality rates and rate ratios. We then calculate amenable mortality, and separate it between poor quality in non-utilization. 
We then conduct a number of sensitivity analyses. Finally, we calculate the uncertainty intervals. 
********************************************************************************/

*Calculate estimates of deaths to poor quality
set more off

use "$user/$analysis/Type 1 conditions.dta", clear
append using "$user/$analysis/type 2 conditions.dta"
append using "$user/$analysis/type 3 conditions.dta"

drop disease condition
encode disease2, gen(disease)
encode condition2, gen(condition)

*Merge in util data
merge m:1 country util_measure using "$user/$utilraw/country_util_shaped.dta"
drop if _merge!=3 //Drops Bermuda and US virgin islands from analysis (fine as high income) and drops the util data for countries where we don't have GBD (small islands)
drop _merge

rename pred_util util

lab var deaths_m "Total actual deaths (from GBD)"
lab var deaths_l "Lower estimate of actual deaths (from GBD)"
lab var deaths_u "Upper estimate of actual deaths (from GBD)"
lab var incidence "Incidence of disease (from IHME)"
lab var poi "Population of interest: type 1 population type 2 incidence type 3 prevalence"
lab var pop "Population of age/sex group"
lab var rate "Relevant rate: 1 death rate 2 incidence rate 3 prevalence rate"
lab var type "Condition group"
lab var util_measure "Utilization measure used"
lab var util "Utilization value including prediction"
lab var util_real "Util values with predicted set to missing"
lab var util_income "Average util value within income group"
lab var disease "Disease from GBD"
lab var condition "Condition from GBD (grouped diseases)"

drop util_gbd death_rate*

save "$user/$analysis/all conditions.dta", replace


*CFR and rate ratios
********************************************************************************
	
*Trim small numbers of deaths
sort country disease 
by country disease: egen tot_deaths = total(deaths_m) 
drop if tot_deaths<10 & overall_ref!=1 //Drops observations with negligible numbers of deaths for non-reference group countries			

		
*Rate ratios for each group:
sort age female disease
	by age female disease: egen ref_rate = mean(rate) if overall_ref==1
	by age female disease: egen ref_rate_lmic = mean(rate) if lmic_ref==1
sort gbd_region age female disease 
	by gbd_region age female disease: egen ref_rate_ntd = min(rate) if condition==12
	sort age female disease ref_rate
	by age female disease: carryforward ref_rate, replace
	sort age female disease ref_rate_lmic 
	by age female disease: carryforward ref_rate_lmic, replace
	
	gen rate_ratio = ref_rate/rate //Calculate the PR ratio
		gen rr_replace = rate_ratio>1 
		replace rate_ratio=1 if rate_ratio>1 //Only adjust prevelance when prevalence rate is lower in high income countries
		replace rate_ratio=1 if type==1 //Don't adjust for prevention in type 1 conditions, should be preventable in health system	
				
*CFRs 	
	rename deaths deaths_m //Rename to get the forloops to work
foreach est in m u l { 
	*local est = "m"
	gen cfr_`est' = deaths_`est'/poi
	drop if cfr_`est'==. //Drops observations that don't have complete poi data
		*If CFRs are over one, replace with country median CFR
		gen cfr_over1_`est' = cfr_`est'>1
		sort country disease
		by country disease: egen country_cfr_`est' = median(cfr_`est')
		replace cfr_`est' = country_cfr_`est' if cfr_over1_`est'==1 //replaces CFRs with country median
		*Trim outliers to 3 SD over the mean
		sort age female disease overall_ref
		by age female disease: egen mean_cfr_`est'= mean(cfr_`est')
		by age female disease: egen sd_cfr_`est' = sd(cfr_`est')
		gen sd_3_`est' = mean_cfr_`est'+3*sd_cfr_`est'
		gen outlier_`est' = cfr_`est'>sd_3_`est' & cfr_`est'!=. 
			replace cfr_`est' = sd_3_`est' if outlier_`est'==1 //Trims CFRs to 3 SD above mean
		drop cfr_over1_`est' country_cfr_`est' sd_cfr_`est' sd_3_`est' mean_cfr_`est' outlier_`est'	
		*Create reference groups
		by age female disease: egen ref_cfr_`est' = mean(cfr_`est') if overall_ref==1
		by age female disease: egen ref_cfr_lmic_`est' = mean(cfr_`est') if lmic_ref==1
		sort age female disease ref_cfr_`est'
		by age female disease: carryforward ref_cfr_`est', replace
		*cap gen replace = ref_cfr_`est'>cfr_`est' if lmic==1 & cfr_`est'!=.
		sort age female disease ref_cfr_lmic_`est'
		by age female disease: carryforward ref_cfr_lmic_`est', replace
		sort gbd_region age female disease
		by gbd_region age female disease: egen ref_cfr2_`est' = min(cfr_`est')	//For NTDs that weren't included in the other reference regions
		replace ref_cfr_`est' = ref_cfr2_`est' if ref_cfr_`est'==. | disease==43 
		replace ref_cfr_lmic_`est' = ref_cfr2_`est' if ref_cfr_lmic_`est'==. | disease==43		
		}	
	
		
	*Main results
********************************************************************************
foreach est in m u l {
	*Different CFR for non-utilizers
		sort country disease 
		by country disease: egen avg_cfr_`est' = mean(cfr_`est')
		sort disease gbd_region
		by disease gbd_region: egen quar_cfr_gbd_`est' = pctile(avg_cfr_`est'), p(75) //Top 75th percentile of country average CFRs by region
		gen cfr_ratio_`est' = quar_cfr_gbd_`est'/avg_cfr_`est'	//Ratio of 75th percentile to country average
	replace cfr_ratio_`est' = 1 if cfr_ratio_`est'<1	
	replace cfr_ratio_`est' = 3 if cfr_ratio_`est'>3
	
		gen amenable_`est' = (cfr_`est'-ref_cfr_`est')*rate_ratio*poi
			replace amenable_`est'=0 if amenable_`est'<0 //Where CFR is lower in an LMIC, we force amenable deaths to be 0 rather than negative
		gen pq_`est' = rate_ratio*poi*(cfr_`est'-(cfr_ratio_`est'*cfr_`est'*(1-util)+ref_cfr_`est'*util))
			cap gen replace = pq_m<0 if lmic==1 & cfr_m!=. 
			replace pq_`est'=0 if pq_`est'<0 //Where CFR is lower in an LMIC, we force pq deaths to be 0 rather than negative
		gen prev_`est' = deaths_`est'*(1-rate_ratio)	
		*Adverse medical events: All AMEs are considered due to poor quality care
			replace amenable_`est' = deaths_`est' if disease==13
			replace pq_`est' = deaths_`est' if disease==13
			replace prev_`est' = 0 if disease==13
		gen nonutil_`est' = amenable_`est'-pq_`est'
		gen avert_`est' = amenable_`est'+prev_`est'
	}


*Sensitivity results
********************************************************************************	
*LMIC reference sensitivity analysis
	foreach est in m u l {
		gen amenable_lmic_`est' = (cfr_`est'-ref_cfr_lmic_`est')*rate_ratio*poi
			replace amenable_lmic_`est'=0 if amenable_lmic_`est'<0
		gen pq_lmic_`est' = rate_ratio*poi*(cfr_`est'-(cfr_ratio_`est'*cfr_`est'*(1-util)+ref_cfr_lmic_`est'*util))	
			replace pq_lmic_`est' = 0 if pq_lmic_`est'<0	//Where CFR is lower in an LMIC, we force pq deaths to be 0 rather than negative
		*Adverse medical events
			replace amenable_lmic_`est' = deaths_`est' if disease==13
			replace pq_lmic_`est' = deaths_`est' if disease==13
		gen nonutil_lmic_`est' = amenable_lmic_`est'-pq_lmic_`est'
		gen avert_lmic_`est' = amenable_lmic_`est'+prev_`est'
	}	
	
	
	
*Utilization update sensitivity analysis

*Idea is that since 2002, countries may have improved their utilization rates to be comparable to those of the next income group up. Consequently, low income countries will get the 
*low middle utilization rate unless their rate is already higher, etc. 

gen util_alt= util
foreach util in asm_heart fev_di road_inj ever pap {
	forvalues i =1/4 {
	gen `util'_`i' = util_income if util_measure=="`util'" & income==`i'
	sort age female util_measure
	by age female util_measure: egen max_`util'_`i' = max(`util'_`i')
	}
	forvalues i=1/3 {
	local j = `i'+1
	replace max_`util'_`i' = max_`util'_`j' 
	}
	gen `util'_alt =.
	forvalues i=1/3 {
	replace `util'_alt = max_`util'_`i' if income==`i'
	}
	replace util_alt = `util'_alt if (`util'_alt>util | util_real==.) & util_measure=="`util'"
	}
	
	drop asm_heart_1-pap_alt

	foreach est in m u l {
		gen pq_util_`est' = rate_ratio*poi*(cfr_`est'-(cfr_ratio_`est'*cfr_`est'*(1-util_alt)+ref_cfr_`est'*util_alt))	
			replace pq_util_`est' = 0 if pq_util_`est'<0	//Where CFR is lower in an LMIC, we force pq deaths to be 0 rather than negative
		*Adverse medical events
			replace pq_util_`est' = deaths_`est' if disease==13
		gen nonutil_util_`est' = amenable_`est'-pq_util_`est'
	}	
		
	
*Leave the CFR for non-utilizers to be the country's prevailing rate sensitivity analysis	
foreach est in m u l {
	*by disease age female: egen med_cfr_`est' = pctile(cfr_`est'), p(50)
		gen amenable_cfr_`est' = (cfr_`est'-ref_cfr_`est')*rate_ratio*poi
			replace amenable_cfr_`est'=0 if amenable_cfr_`est'<0 //Where CFR is lower in an LMIC, we force amenable deaths to be 0 rather than negative
		gen pq_cfr_`est' = rate_ratio*poi*(cfr_`est'-(cfr_`est'*(1-util)+ref_cfr_`est'*util))
			cap gen replace_adj = pq_cfr_m<0 if lmic==1 & cfr_m!=.
			replace pq_cfr_`est'=0 if pq_cfr_`est'<0 //Where CFR is lower in an LMIC, we force pq deaths to be 0 rather than negative
		*Adverse medical events
			replace amenable_cfr_`est' = deaths_`est' if disease==13
			replace pq_cfr_`est' = deaths_`est' if disease==13
		gen nonutil_cfr_`est' = amenable_cfr_`est'-pq_cfr_`est'
		gen avert_cfr_`est' = amenable_cfr_`est'+prev_`est'	
	}		
	
save "$user/$analysis/all conditions.dta", replace


*PAF senstivity analysis
import delimited "$user/$gbdraw/risks 2016.csv", clear //This is a raw file from GBD that includes the behavioral and environmental/occupational risks for all 61 conditions in the analysis

replace val= 0 if val<0 //Some values are below 0 for NCDs in younger age groups-- ignore

encode rei, gen(risk)
drop measure metric year upper lower rei
reshape wide val, i(location sex age cause) j(risk)

gen jpaf = 1-(1-val1)*(1-val2)
replace jpaf = val1 if jpaf==.
replace jpaf = val2 if jpaf==.

drop val1 val2
rename (cause location) (disease country)
encode sex, gen(female)
	recode female (2=0)
encode age, gen(age_group)
	recode age_group (1=3) (2=4) (3=5) (4=6) (5=7) (6=8) (7=9) (8=10) (9=2) (10=11) (11=12) (12=13) (13=14) (14=15) (15=1)
	cap lab def age_lbl 1 "Under 5" 2 "5 to 9" 3 "10 to 14" 4 "15 to 19" 5 "20 to 24" 6 "25 to 29" 7 "30 to 34" 8 "35 to 39" 9 "40 to 44" 10 "45 to 49" 11 "50 to 54" 12 "55 to 59" 13 "60 to 64" 14 "64 to 70" 15 "70 to 74"
	lab val age_group age_lbl	
rename disease disease2
	
merge 1:1 disease2 country female age_group using "$user/$analysis/all conditions.dta"
drop if _merge==1 
drop _merge age sex

	foreach est in m u l {
		gen amenable_jpaf_`est' = (cfr_`est'-ref_cfr_`est')*jpaf*poi
			replace amenable_jpaf_`est' = 0 if amenable_jpaf_`est'<0
		gen pq_jpaf_`est' = jpaf*poi*(cfr_`est'-(cfr_ratio_`est'*cfr_`est'*(1-util)+ref_cfr_`est'*util))
		gen prev_jpaf_`est' = deaths_`est'*(1-jpaf)	
			replace pq_jpaf_`est' = 0 if pq_jpaf_`est'<0	//Where CFR is lower in an LMIC, we force pq deaths to be 0 rather than negative
		*Adverse medical events
			replace amenable_jpaf_`est' = deaths_`est' if disease==13
			replace pq_jpaf_`est' = deaths_`est' if disease==13
			replace prev_jpaf_`est' = 0 if disease==13
		gen nonutil_jpaf_`est' = amenable_jpaf_`est'-pq_jpaf_`est'
		gen avert_jpaf_`est' = amenable_jpaf_`est'+prev_jpaf_`est'
	}

	
*LIC UHC package sensitivity:
*drop if ((condition==2 & disease!=1) | condition==4 | condition==10 | disease==20 | disease==21 | disease==53 | disease==54) & (income==1 | income==2)
	//Drops cancers except cervical, congenital, mental health, CKD, poisonings, road injuries, from the LIC countries	
	
save "$user/$analysis/all conditions.dta", replace	
	
**Years of life lost 
********************************************************************************	
import delimited "$user/$gbdraw/Life expectancy.csv", clear

drop if age==1
gen age_group = _n
drop if age_group>15
gen life_exp = ex-2.5
drop age nqx lx ex

save "$user/$analysis/lifeexpectancy.dta", replace
use "$user/$analysis/all conditions.dta", clear
merge m:1 age_group using "$user/$analysis/lifeexpectancy.dta"
drop _merge

foreach est in m u l {
gen yll_`est' = pq_`est'*life_exp
gen yll_avert_`est' = avert_`est'*life_exp
}
		
	
*Organizing data
********************************************************************************
	lab var age_group "Age group"
	lab var util "Rate of utilization"
	lab var disease "Cause of death"
	lab var female "Female"
	lab var lmic "Low or middle income country"
	lab var tot_deaths "Total deaths per country and disease"
	lab var life_exp "Life expectancy (GBD Standard)"
	lab var ref_rate "Reference IR/PR/death rate"
	lab var ref_rate_lmic "LMIC reference IR/PR/death rate"
	lab var rate_ratio "Ratio of reference rate to LMIC rate"

	lab var ref_cfr_m "Reference CFR using hic reference group"
	lab var ref_cfr_lmic_m "Reference CFR using lmic reference group"
	lab var cfr_m "Actual case fatality rate"
	lab var cfr_ratio_m "Adjustment for non-utilizers"
	lab var avg_cfr_m "Country average CFR for disease"
	lab var amenable_m "Amenable to the health system"
	lab var prev_m "Preventable deaths"
	lab var pq_m "Deaths due to poor quality"
	lab var nonutil_m "deaths due to non-utilization"
	lab var avert_m "deaths that are avertable (preventable+amenable)"
	lab var amenable_lmic_m "Amenable deaths: lmic ref"
	lab var pq_lmic_m "Deaths due to poor quality: lmic ref"
	lab var nonutil_lmic_m "Deaths due to non-utilization: lmic ref"
	lab var yll_m "Years life lost due to poor quality"
	lab var yll_avert_m "Avoidable years life lost"
	lab var util_alt "Adjusted WHS rates for potential increase in utilization since 2002"
	lab var pq_util_m "deaths due to poor quality with adjusted WHS rates"
	lab var nonutil_util_m "deaths due to non-utilization with adjusted WHS rates"
	lab var jpaf "Percent of deaths due to behavioral and environmental risks"
	lab var amenable_jpaf_m "amenable deaths using JPAF"
	lab var prev_jpaf_m "preventable deaths using JPAF"
	lab var pq_jpaf_m " deaths due to poor quality using JPAF"
	lab var nonutil_jpaf_m "deaths due to non-utilization using JPAF"
	lab var avert_jpaf_m "deaths that are avertable using JPAF"
	lab var replace "LMIC CFR lower than reference group"
	
foreach est in l u {	
	lab var ref_cfr_`est' "`est' Reference CFR using hic reference group"
	lab var ref_cfr_lmic_`est' "`est' Reference CFR using lmic reference group"
	lab var cfr_`est' "`est' Actual case fatality rate"
	lab var amenable_`est' "`est' amenable deaths"
	lab var prev_`est' "`est' preventable deaths"
	lab var pq_`est' "`est' deaths due to poor quality"
	lab var nonutil_`est' "`est' deaths due to non-utilization"
	lab var avert_`est' "`est' avertable deaths"
	lab var pq_lmic_`est' "4Cs `est' deaths due to poor quality"
	lab var nonutil_lmic_`est' "4Cs `est' deaths due to non-utilization"
	lab var pq_util_`est' "`est' deaths due to poor quality with adjusted WHS rates"
	lab var nonutil_util_`est' "`est' deaths due to non-utilization with adjusted WHS rates"
	lab var prev_jpaf_`est' "`est' preventable deaths using JPAF"
	lab var pq_jpaf_`est' "`est' deaths due to poor quality using JPAF"
	lab var nonutil_jpaf_`est' "`est' deaths due to non-utilization using JPAF"
	lab var yll_`est' "`est' years life lost due to poor quality"
	lab var yll_avert_`est' "`est' avoidable years life lost" 
}	

order country age_group female type condition disease 
sort type condition disease country age_group female

	
save "$user/$analysis/all conditions.dta", replace	

*Uncertainty intervals
********************************************************************************
*Assumes that age/sex groups within country and disease are completely correlated- add 
*Assumes that country/diseases are completely independent and normally distributed. Calculate the variance, add the variance and then recalculate the confidence intervals. 

use "$user/$analysis/all conditions.dta", clear	 
	collapse (sum) amenable_* pq* prev_* nonutil* avert_* deaths* yll_* pop_millions (first) lmic , by(country disease) 
	foreach death in amenable amenable_lmic amenable_cfr amenable_jpaf pq pq_lmic pq_util pq_cfr pq_jpaf prev prev_jpaf ///
		nonutil nonutil_lmic nonutil_util nonutil_cfr nonutil_jpaf avert avert_lmic avert_cfr avert_jpaf yll deaths {
		gen var_`death' = ((`death'_u-`death'_l)/(2*invnorm(.975)))^2
		}
	collapse (sum) *_m var_*, by(lmic)
foreach death in amenable amenable_lmic amenable_cfr amenable_jpaf pq pq_lmic pq_util pq_cfr pq_jpaf prev prev_jpaf ///
		nonutil nonutil_lmic nonutil_util nonutil_cfr nonutil_jpaf avert avert_lmic avert_cfr avert_jpaf yll deaths {
	gen `death'_u = `death'_m+invnorm(.975)*sqrt(var_`death')
	gen `death'_l = `death'_m-invnorm(.975)*sqrt(var_`death')
	}

save "$user/$analysis/ui_results.dta", replace	

